########################################################
## PROGRAM NAME: 005_coverage.R                       ##
## AUTHOR: MATT MLECZKO                               ##
## INPUTS:                                            ##
##         ccs_final.Rda                              ##
##         002_tract_to_cs.Rda                        ##
##         002_tract_to_pl.Rda                        ##
##         002_nzlu_msasample_2022.Rda                ##
##         002_wrld_nllus_msasample_2006.Rda          ##
##         002_wrld_nllus_place_2006.Rda              ##
##         002_nzlu_muni_2022.Rda                     ##
##         2020_Census_del.Rda                        ##
##         004_allmunis_2009.Rda                      ##
##         004_allmunis_2022.Rda                      ##
##                                                    ##
##                                                    ##
## OUTPUTS:                                           ##
##         005_muni_msa_2009.Rda                      ##
##         005_msa_t2_coverage_rates.csv              ##
##         005_cc_out.Rda                             ##
##         005_msas_keep.Rda                          ##
##         005_msas_keep_alt.Rda                      ##
##         005_munis_cov.Rda                          ##
##                                                    ## 
## PURPOSE: Calculate MSA coverage rates              ##
##                                                    ##
## LIST OF UPDATES:                                   ##
########################################################

#log <- file(# USER DEFINED PATH AND FILE NAME HERE #)
#sink(log, append=TRUE)
#sink(log, append=TRUE, type="message")

## load libraries ##

library(tidycensus)
library(tidyverse)
library(readxl)
library(gdata)

## define paths
data_path <- # USER DEFINED PATH HERE #

## set working directory
setwd(data_path)

`%notin%` <- Negate(`%in%`)

## create a merge function that creates merge frequency as in Stata ##
## from user rwbuie at stackoverflow: https://stackoverflow.com/questions/30358401/is-there-a-way-to-create-statas-merge-indicator-variable-with-rs-merge ##
stata.merge <- function(x,y, name){
  x$df1 <- 1
  y$df2 <- 2
  df <- merge(x,y, by = name, all = TRUE)
  df$merge.variable <- rowSums(df[,c("df1", "df2")], na.rm=TRUE)
  df$df1 <- NULL
  df$df2<- NULL
  df
  #print(table(df$merge.variable))
  
  ## return the merged dataframe
  return(df)
}

## load cc data frame ## 
load("ccs_final.Rda")
load("002_tract_to_cs.Rda")
load("002_tract_to_pl.Rda")
load("002_nzlu_msasample_2022.Rda")
load("002_wrld_nllus_msasample_2006.Rda")
load("002_wrld_nllus_place_2006.Rda")
load("002_nzlu_muni_2022.Rda")
load("2020_Census_del.Rda")
load("004_allmunis_2009.Rda")
load("004_allmunis_2022.Rda")

## subset to metro munis ##

ptm <- tract.to.pl.metro %>%
  filter(GEOID_muni %in% munis.final.2009$GEOID) %>%
  select(GEOID_muni,
         placefp,
         placenm,
         cbsa10,
         `CBSA Title`) %>%
  unique() %>%
  rename(GEOID = GEOID_muni,
         cbsaname10 = `CBSA Title`)

length(unique(ptm$GEOID))

ctm <- tract.to.cs.metro %>%
  filter(GEOID_muni %in% munis.final.2009$GEOID & 
         GEOID_muni %notin% ptm$GEOID) %>%
  select(GEOID_muni,
         GEOID_muni_full,
         cousubfp,
         cousubnm,
         cbsa10,
         `CBSA Title`) %>%
  unique() %>%
  rename(GEOID = GEOID_muni,
         GEOID_full = GEOID_muni_full,
         cbsaname10 = `CBSA Title`)

length(unique(ctm$GEOID))

range(nchar(ptm$GEOID))
class(ptm$GEOID)
length(unique(ptm$GEOID)) == nrow(ptm)

range(nchar(ctm$GEOID))
class(ctm$GEOID)
length(unique(ctm$GEOID)) == nrow(ctm)

range(nchar(munis.final.2009$GEOID))
class(munis.final.2009$GEOID)
length(unique(munis.final.2009$GEOID)) == nrow(munis.final.2009)

range(nchar(munis.final.2022$GEOID))
class(munis.final.2022$GEOID)
length(unique(munis.final.2022$GEOID)) == nrow(munis.final.2022)

## 2009 ##

muni.fm.2009 <- munis.final.2009 %>%
  select(GEOID,
         GEOID_full,
         NAME,
         pop_total_2009)

muni.msa.2009.m1 <- stata.merge(muni.fm.2009,
                                ptm,
                                "GEOID")

## check merge ##
table(muni.msa.2009.m1$merge.variable, useNA = "ifany")

## keep merges ##
muni.msa.2009.keep1 <- muni.msa.2009.m1 %>%
  filter(merge.variable == 3 & 
         !grepl("district|District|CDP|CCD",NAME) | 
         GEOID %in% c("1150000",
                      "1232400",
                      "1271625",
                      "1522700",
                      "2412150",
                      "2446725",
                      "5364365") |
         cbsa10 %in% c("46520")) %>% 
  select(-merge.variable,
         -placefp,
         -placenm)

muni.msa.2009.nm <- muni.msa.2009.m1 %>%
  filter(merge.variable == 1) %>%
  select(-merge.variable,
         -GEOID_full,
         -placefp,
         -cbsa10,
         -cbsaname10,
         -placenm)

muni.msa.2009.m2 <- stata.merge(muni.msa.2009.nm,
                                ctm,
                                "GEOID")

## check merge ##
table(muni.msa.2009.m2$merge.variable, useNA = "ifany")

## keep merges ##
muni.msa.2009.keep2 <- muni.msa.2009.m2 %>%
  filter(merge.variable == 3 & 
         !grepl("district|District|CDP|CCD",NAME) & 
         GEOID %notin% muni.msa.2009.keep1$GEOID) %>%
  select(-merge.variable,
         -cousubfp,
         -cousubnm)

muni.msa.2009 <- rbind(muni.msa.2009.keep1,
                       muni.msa.2009.keep2)

save(muni.msa.2009,
     file = "005_muni_msa_2009.Rda")

muni.msa.2009.all <- muni.msa.2009 %>%
  group_by(cbsa10) %>%
  summarize(name = first(cbsaname10),
            n_all = n(),
            totpop = sum(pop_total_2009,na.rm=T), .groups = "drop")

summary(muni.msa.2009.all$n_all)

muni.msa.t1.m <- stata.merge(muni.msa.2009,
                             wrld.nllus.2006.wmsas.out,
                             "GEOID")

## check merge ##
table(muni.msa.t1.m$merge.variable, useNA = "ifany")

## keep matches and necessary vars ##
muni.msa.t1 <- muni.msa.t1.m %>%
  filter(merge.variable == 3) %>%
  select(GEOID,
         cbsa10,
         cbsaname10,
         pop_total_2009) %>%
  unique() %>%
  group_by(cbsa10) %>%
  summarize(name = first(cbsaname10),
            n = n(),
            pop = sum(pop_total_2009,na.rm=T), .groups = "drop")

summary(muni.msa.t1$n)

## now, merge both data frames to calculate coverage rate ##

muni.msa.t1.cr.m <- stata.merge(muni.msa.t1,
                                muni.msa.2009.all,
                                "cbsa10")

table(muni.msa.t1.cr.m$merge.variable)

## calculate coverage rate ##

muni.msa.t1.cr <- muni.msa.t1.cr.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  mutate(cr = n/n_all,
         cr_popwt = pop/totpop)

## check coverage rate ##
summary(muni.msa.t1.cr$cr)
summary(muni.msa.t1.cr$cr_popwt)

## 2022 ##

muni.fm.2022 <- munis.final.2022 %>%
  select(GEOID,
         GEOID_full,
         NAME,
         pop_total_2022)

muni.msa.2022.m1 <- stata.merge(muni.fm.2022,
                                ptm,
                                "GEOID")

## check merge ##
table(muni.msa.2022.m1$merge.variable, useNA = "ifany")

## keep merges ##
muni.msa.2022.keep1 <- muni.msa.2022.m1 %>%
  filter(merge.variable == 3 & 
           !grepl("district|District|CDP|CCD",NAME) | 
           GEOID %in% c("1150000",
                        "1232400",
                        "1271625",
                        "1522700",
                        "2412150",
                        "2446725",
                        "5364365") |
           cbsa10 %in% c("46520")) %>% 
  select(-merge.variable,
         -placefp,
         -placenm)

muni.msa.2022.nm <- muni.msa.2022.m1 %>%
  filter(merge.variable == 1) %>%
  select(-merge.variable,
         -GEOID_full,
         -placefp,
         -cbsa10,
         -cbsaname10,
         -placenm)

muni.msa.2022.m2 <- stata.merge(muni.msa.2022.nm,
                                ctm,
                                "GEOID")

## check merge ##
table(muni.msa.2022.m2$merge.variable, useNA = "ifany")

## keep merges ##
muni.msa.2022.keep2 <- muni.msa.2022.m2 %>%
  filter(merge.variable == 3 & 
           !grepl("district|District|CDP|CCD",NAME) & 
           GEOID %notin% muni.msa.2022.keep1$GEOID) %>%
  select(-merge.variable,
         -cousubfp,
         -cousubnm)

muni.msa.2022 <- rbind(muni.msa.2022.keep1,
                       muni.msa.2022.keep2)

save(muni.msa.2022,
     file = "005_muni_msa_2022.Rda")


muni.msa.2022.all <- muni.msa.2022 %>%
  group_by(cbsa10) %>%
  summarize(name = first(cbsaname10),
            n_all = n(),
            totpop = sum(pop_total_2022,na.rm=T), .groups = "drop")

summary(muni.msa.2022.all$n_all)


## check ##
muni.msa.0922.all <- muni.msa.2022.all %>%
  left_join(muni.msa.2009.all, "cbsa10") %>%
  mutate(diff = abs(n_all.x - n_all.y)) %>%
  filter(diff > 0)

nrow(muni.msa.0922.all)
sum(muni.msa.0922.all$diff)
msas.to.check <- muni.msa.0922.all$cbsa10

## proceed with 2022 processing ##

muni.msa.t2.m <- stata.merge(muni.msa.2022,
                             nzlu.2022.final.zri,
                             "GEOID")

## check merge ##
table(muni.msa.t2.m$merge.variable, useNA = "ifany")

## keep matches and necessary vars ##
muni.msa.t2 <- muni.msa.t2.m %>%
  filter(merge.variable == 3) %>%
  select(GEOID,
         cbsa10,
         cbsaname10,
         pop_total_2022) %>%
  unique() %>%
  group_by(cbsa10) %>%
  summarize(name = first(cbsaname10),
            n = n(),
            pop = sum(pop_total_2022,na.rm=T), .groups = "drop")

summary(muni.msa.t2$n)

## now, merge both data frames to calculate coverage rate ##

muni.msa.t2.cr.m <- stata.merge(muni.msa.t2,
                                muni.msa.2022.all,
                                "cbsa10")

table(muni.msa.t2.cr.m$merge.variable)

## calculate coverage rate ##

muni.msa.t2.cr <- muni.msa.t2.cr.m %>%
  filter(merge.variable == 3) %>%
  select(-merge.variable) %>%
  mutate(cr = n/n_all,
         cr_popwt = pop/totpop)

## check coverage rate ##
summary(muni.msa.t2.cr$cr)
summary(muni.msa.t2.cr$cr_popwt)

write.csv(muni.msa.t2.cr,
          file = "005_msa_t2_coverage_rates.csv")

## sample restrictions ##
## (1a) muni coverage rate of at least 5% ##
muni.msa.t1.keep.a <- muni.msa.t1.cr %>%
  filter(cr >= 0.05) %>%
  select(cbsa10)

muni.msa.t2.keep.a <- muni.msa.t2.cr %>%
  filter(cr >= 0.05) %>%
  select(cbsa10)

## (1b) pop coverage rate of at least 20% ##
muni.msa.t1.keep.b <- muni.msa.t1.cr %>%
  filter(cr_popwt >= 0.20) %>%
  select(cbsa10)

muni.msa.t2.keep.b <- muni.msa.t2.cr %>%
  filter(cr_popwt >= 0.20) %>%
  select(cbsa10)

muni.msa.keep.a <- rbind(muni.msa.t1.keep.a,
                         muni.msa.t2.keep.a)

muni.msa.keep.b <- rbind(muni.msa.t1.keep.b,
                         muni.msa.t2.keep.b)


msas.keep.r1a <- unique(muni.msa.keep.a$cbsa10)
msas.keep.r1b <- unique(muni.msa.keep.b$cbsa10)

## (2) central city is present ## 

cc.in1 <- nzlu.2022.final.zri %>%
  filter((place == "NewYork" & statename == "NY")|
         (place == "Newark" & statename == "NJ") |
         (place == "Jersey" & statename == "NJ") |
         (place == "LosAngeles" & statename == "CA") | 
         (place == "LongBeach" & statename == "CA") |
         (place == "Anaheim" & statename == "CA") | 
         (place == "Chicago" & statename == "IL") | 
         (place == "Dallas" & statename == "TX") |
         (place == "FortWorth" & statename == "TX") |
         (place == "Houston" & statename == "TX") |
         (place == "Washington" & statename == "DC") | 
         (place == "Philadelphia" & statename == "PA") |
         (place == "Camden" & statename == "NJ") | 
         (place == "Wilmington" & statename == "DE") |
         (place == "Miami" & statename == "FL") | 
         (place == "Atlanta" & statename == "GA") | 
         (place == "Boston" & statename == "MA") | 
         (place == "Phoenix" & statename == "AZ") |
         (place == "SanFrancisco" & statename == "CA") |
         (place == "Oakland" & statename == "CA") | 
         (place == "Riverside" & statename ==  "CA") |
         (place == "SanBernardino" & statename == "CA") |
         (place == "Detroit" & statename == "MI") |
         (place == "Seattle" & statename == "WA") | 
         (place == "Minneapolis" & statename == "MN") |
         (place == "StPaul" & statename == "MN") | 
         (place == "SanDiego" & statename == "CA") |
         (place == "Tampa" & statename == "FL") | 
         (place == "Denver" & statename == "CO") | 
         (place == "Baltimore" & statename == "MD") | 
         (place == "StLouis" & statename == "MO") |
         (place == "Orlando" & statename == "FL") | 
         (place == "Charlotte" & statename == "NC") | 
         (place == "SanAntonio" & statename == "TX") |
         (place == "Portland" & statename == "OR") | 
         (place == "Sacramento" & statename == "CA") | 
         (place == "Pittsburgh" & statename == "PA") | 
         (place == "Austin" & statename == "TX") |
         (place == "LasVegas" & statename == "NV") | 
         (place == "Cincinnati" & statename == "OH") |
         (place == "KansasCity" & statename == "KS") | 
         (place == "Kansas" & statename == "MO") |
         (place == "Columbus" & statename == "OH") |
         (place == "Indianapolis" & statename == "IN") |
         (place == "Cleveland" & statename == "OH") |
         (place == "SanJose" & statename == "CA") | 
         (place == "Nashville" & statename == "TN") |
         (place == "VirginiaBeach" & statename == "VA") |
         (place == "Providence" & statename == "RI") |
         (place == "Jacksonville" & statename == "FL") | 
         (place == "Milwaukee" & statename == "WI") | 
         (place == "Oklahoma" & statename == "OK") |
         (place == "Raleigh" & statename == "NC") |
         (place == "Louisville" & statename == "KY") |
         (place == "Memphis" & statename == "TN") | 
         (place == "Richmond" & statename == "VA") |
         (place == "NewOrleans" & statename == "LA") |
         (place == "SaltLake" & statename == "UT") |
         (place == "Hartford" & statename == "CT") |
         (place == "Buffalo" & statename == "NY") | 
         (place == "Birmingham" & statename == "AL") | 
         (place == "Rochester" & statename == "NY") | 
         (place == "GrandRapids" & statename == "MI") | 
         (place == "Tucson" & statename == "AZ") | 
         (place == "Honolulu" & statename == "HI") |
         (place == "Tulsa" & statename == "OK") |
         (place == "Fresno" & statename == "CA") |
         (place == "Worcester" & statename == "MA") | 
         (place == "Omaha" & statename == "NE") | 
         (place == "Bridgeport" & statename == "CT") |
         (place == "Stamford" & statename == "CT") | 
         (place == "Greenville" & statename == "SC") |
         (place == "Albuquerque" & statename == "NM") |
         (place == "Bakersfield" & statename == "CA") | 
         (place == "Albany" & statename == "NY") | 
         (place == "Knoxville" & statename == "TN") | 
         (place == "McAllen" & statename == "TX") | 
         (place == "BatonRouge" & statename == "LA") | 
         (place == "ElPaso" & statename == "TX") | 
         (place == "Allentown" & statename == "PA") |
         (place == "NewHaven" & statename == "CT") |
         (place == "Oxnard" & statename == "CA") |
         (place == "NorthPort" & statename == "FL") |
         (place == "Sarasota" & statename == "FL") | 
         (place == "Bradenton" & statename == "FL") |
         (place == "Columbia" & statename == "SC") |
         (place == "Dayton" & statename == "OH") |
         (place == "Charleston" & statename == "SC") |
         (place == "Stockton" & statename == "CA") |
         (place == "Greensboro" & statename == "NC") |
         (place == "HighPoint" & statename == "NC") |
         (place == "Boise" & statename == "ID") |
         (place == "CapeCoral" & statename == "FL") |
         (place == "FortMyers" & statename == "FL") |
         (place == "ColoradoSprings" & statename == "CO") |
         (place == "LittleRock" & statename == "AR") |
         (place == "Lakeland" & statename == "FL") | 
         (place == "DesMoines" & statename == "IA") |
         (place == "Akron" & statename == "OH") |
         (place == "Springfield" & statename == "MA") |
         (place == "Poughkeepsie" & statename == "NY") |
         (place == "Newburgh" & statename == "NY") |
         (place == "Middletown" & statename == "NY") |
         (place == "Ogden" & statename == "UT") |
         (place == "Madison" & statename == "WI") |
         (place == "WinstonSalem" & statename == "NC") |
         (place == "Provo" & statename == "UT") |
         (place == "Orem" & statename == "UT") |
         (place == "Deltona" & statename == "FL") |
         (place == "DaytonaBeach" & statename == "FL") | 
         (place == "OrmondBeach" & statename == "FL") | 
         (place == "Syracuse" & statename == "NY") | 
         (place == "Durham" & statename == "NC") | 
         (place == "ChapelHill" & statename == "NC") |
         (place == "Wichita" & statename == "KS") |
         (place == "Toledo" & statename == "OH") |
         (place == "Augusta" & statename == "GA") | 
         (place == "PalmBay" & statename == "FL") | 
         (place == "Melbourne" & statename == "FL") | 
         (place == "Titusville" & statename == "FL") | 
         (place == "Jackson" & statename == "MS") |
         (place == "Harrisburg" & statename == "PA") |
         (place == "Spokane" & statename == "WA") | 
         (place == "Scranton" & statename == "PA") | 
         (place == "Chattanooga" & statename == "TN") | 
         (place == "LancasterCity" & statename == "PA") | 
         (place == "Modesto" & statename == "CA") |
         (place == "Portland" & statename == "ME") | 
         (place == "Fayetteville" & statename == "AR") |
         (place == "Springdale" & statename == "AR") |
         (place == "Rogers" & statename == "AR") | 
         (place == "Fayetteville" & statename == "NC") |
         (place == "EastLansing" & statename == "MI") |
         (place == "Lansing" & statename == "MI") | 
         (place == "Youngstown" & statename == "OH") | 
         (place == "Lexington" & statename == "KY") | 
         (place == "Pensacola" & statename == "FL") | 
         (place == "Huntsville" & statename == "AL") | 
         (place == "Reno" & statename == "NV") |
         (place == "SantaRosa" & statename == "CA") |
         (place == "Petaluma" & statename == "CA") |
         (place == "MyrtleBeach" & statename == "SC") |
         (place == "PortStLucie" & statename == "FL") |
         (place == "Lafayette" & statename == "LA") | 
         (place == "Springfield" & statename == "MO") | 
         (place == "Killeen" & statename == "TX") |
         (place == "Temple" & statename == "TX") | 
         (place == "Visalia" & statename == "CA") | 
         (place == "Asheville" & statename == "NC") |
         (place == "York" & statename == "PA" & type == "City") |
         (place == "Vallejo" & statename == "CA") |
         (place == "SantaMaria" & statename == "CA") | 
         (place == "SantaBarbara" & statename == "CA") |
         (place == "Salinas" & statename == "CA") | 
         (place == "Salem" & statename == "OR") |
         (place == "Mobile" & statename == "AL") | 
         (place == "Reading" & statename == "PA") | 
         (place == "Manchester" & statename == "NH") | 
         (place == "Nashua" & statename == "NH") | 
         (place == "CorpusChristi" & statename == "TX") |
         (place == "Brownsville" & statename == "TX") |
         (place == "Harlingen" & statename == "TX") | 
         (place == "FortWayne" & statename == "IN") | 
         (place == "Salisbury" & statename == "MD") | 
         (place == "Gulfport" & statename == "MS") |
         (place == "Biloxi" & statename == "MS") |
         (place == "Flint" & statename == "MI") | 
         (place == "Savannah" & statename == "GA") | 
         (place == "Peoria" & statename == "IL") | 
         (place == "Canton" & statename == "OH") | 
         (place == "Anchorage" & statename == "AK") | 
         (place == "Beaumont" & statename == "TX") | 
         (place == "PortArthur" & statename == "TX") |
         (place == "Shreveport" & statename == "LA") |
         (place == "BossierCity" & statename == "LA") |
         (place == "Trenton" & statename == "NJ") | 
         (place == "Montgomery" & statename == "AL") | 
         (place == "Davenport" & statename == "IA") | 
         (place == "Tallahassee" & statename == "FL") |
         (place == "Eugene" & statename == "OR") | 
         (place == "Springfield" & statename == "OR") |
         (place == "Ocala" & statename == "FL") | 
         (place == "Naples" & statename == "FL"))

cc.in2 <- nzlu.2022.final.zri %>%
  filter((place == "AnnArbor" & statename == "MI") | 
         (place == "Hickory" & statename == "NC") |
         (place == "Morganton" & statename == "NC") |
         (place == "Lenoir" & statename == "NC") | 
         (place == "Huntington" & statename == "WV") |
         (place == "Ashland" & statename == "KY") | 
         (place == "FortCollins" & statename == "CO") |
         (place == "Lincoln" & statename == "NE") | 
         (place == "Gainesville" & statename == "FL") |
         (place == "Rockford" & statename == "IL") | 
         (place == "Boulder" & statename == "CO") |
         (place == "Greeley" & statename == "CO") |
         (place == "Columbus" & statename == "GA") |
         (place == "GreenBay" & statename == "WI") | 
         (place == "Spartanburg" & statename == "SC") | 
         (place == "SouthBend" & statename == "IN") | 
         (place == "Mishawaka" & statename == "IN") | 
         (place == "Lubbock" & statename == "TX") | 
         (place == "Clarksville" & statename == "TN") | 
         (place == "Roanoke" & statename == "VA") |
         (place == "Evansville" & statename == "IN") |
         (place == "Kingsport" & statename == "TN") | 
         (place == "Kennewick" & statename == "WA") |
         (place == "Richland" & statename == "WA") |
         (place == "Olympia" & statename == "WA") | 
         (place == "Hagerstown" & statename == "MD") |
         (place == "Martinsburg" & statename == "WV") |
         (place == "Utica" & statename == "NY") |
         (place == "Duluth" & statename == "MN") |
         (place == "Crestview" & statename == "FL") |
         (place == "FortWaltonBeach" & statename == "FL") |
         (place == "Destin" & statename == "FL") | 
         (place == "Longview" & statename ==  "TX") |
         (place == "Wilmington" & statename == "NC") | 
         (place == "SanLuisObispo" & statename == "CA") |
         (place == "Merced" & statename == "CA") |
         (place == "Waco" & statename ==  "TX") |
         (place == "SiouxFalls" & statename == "SD") |
         (place == "CedarRapids" & statename == "IA") |
         (place == "Bremerton" & statename == "WA") | 
         (place == "Silverdale" & statename == "WA") |
         (place == "PortOrchard" & statename == "WA") | 
         (place == "Atlantic" & statename == "NJ") |
         (place == "Erie" & statename == "PA") |
         (place == "SantaCruz" & statename == "CA") |
         (place == "Watsonville" & statename == "CA") | 
         (place == "Amarillo" & statename == "TX") |
         (place == "Tuscaloosa" & statename ==  "AL") | 
         (place == "Norwich" & statename == "CT") | 
         (place == "NewLondon" & statename == "CT") | 
         (place == "CollegeStation" & statename == "TX") |
         (place == "Bryan" & statename == "TX") |
         (place == "Laredo" & statename == "TX") |
         (place == "Kalamazoo" & statename == "MI") |
         (place == "Lynchburg" & statename == "VA") |
         (place == "Charleston" & statename == "WV") | 
         (place == "Yakima" & statename == "WA") |
         (place == "Fargo" & statename == "ND") | 
         (place == "Binghamton" & statename == "NY") |
         (place == "FortSmith" & statename == "AR") |
         (place == "Appleton" & statename == "WI") |
         (place == "Prescott" & statename == "AZ") |
         (place == "Macon" & statename == "GA") |
         (place == "Tyler" & statename == "TX") |
         (place == "Topeka" & statename == "KS") |
         (place == "Daphne" & statename == "AL") |
         (place == "Fairhope" & statename == "AL") |
         (place == "Foley" & statename == "AL") |
         (place == "Barnstable" & statename == "MA") |
         (place == "Bellingham" & statename == "WA") | 
         (place == "Rochester" & statename == "MN") |
         (place == "Burlington" & statename == "VT") |
         (place == "Lafayette" & statename ==  "IN") |
         (place == "Medford" & statename == "OR") |
         (place == "Champaign" & statename == "IL") |
         (place == "Urbana" & statename == "IL") |
         (place == "LakeCharles" & statename == "LA") |
         (place == "Charlottesville" & statename == "VA") |
         (place == "LasCruces" & statename == "NM") |
         (place == "HiltonHeadIsland" & statename == "SC") |
         (place == "Bluffton" & statename == "SC") |
         (place == "Beaufort" & statename == "SC") | 
         (place == "Athens" & statename == "GA") |
         (place == "LakeHavasu" & statename == "AZ") |
         (place == "Kingman" & statename == "AZ") |
         (place == "Chico" & statename == "CA") |
         (place == "Columbia" & statename == "MO") |
         (place == "Springfield" & statename == "IL") |
         (place == "Johnson" & statename == "TN") |
         (place == "Houma" & statename == "LA") |
         (place == "Thibodaux" & statename == "LA") |
         (place == "Monroe" & statename == "LA") |
         (place == "Elkhart" & statename == "IN") |
         (place == "Goshen" & statename == "IN") |
         (place == "Jacksonville" & statename == "NC") |
         (place =="Yuma" & statename == "AZ") |
         (place == "Gainesville" & statename == "GA") |
         (place == "Florence" & statename == "SC") |
         (place == "StCloud" & statename == "MN") |
         (place == "Bend" & statename == "OR") |
         (place =="Racine" & statename == "WI") |
         (place == "WarnerRobins" & statename == "GA") |
         (place == "SaginawCity" & statename == "MI") |
         (place == "PuntaGorda" & statename == "FL") |
         (place == "TerreHaute" & statename == "IN") |
         (place == "Billings" & statename == "MT") |
         (place == "Redding" & statename == "CA") |
         (place == "Dover" & statename == "DE") |
         (place == "Kingston" & statename == "NY") |
         (place == "Joplin" & statename == "MO") |
         (place == "Yuba" & statename == "CA") |
         (place == "Jackson" & statename == "TN") |
         (place == "StGeorge" & statename == "UT") |
         (place == "ElCentro" & statename == "CA") |
         (place == "BowlingGreen" & statename == "KY") |
         (place == "Abilene" & statename == "TX") |
         (place == "Muskegon" & statename == "MI") |
         (place == "Iowa" & statename == "IA") |
         (place == "Midland" & statename == "TX") |
         (place == "PanamaCity" & statename == "FL") |
         (place == "Auburn" & statename == "AL") |
         (place == "Hattiesburg" & statename == "MS") |
         (place == "EauClaire" & statename == "WI") |
         (place == "Oshkosh" & statename == "WI") |
         (place == "Burlington" & statename == "NC") |
         (place == "CoeurdAlene" & statename == "ID") |
         (place == "Bloomington" & statename == "IL") |
         (place == "Greenville" & statename == "NC") |
         (place == "CedarFalls" & statename == "IA") |
         (place == "Waterloo" & statename == "IA") |
         (place == "EastStroudsburg" & statename == "PA") |
         (place == "Pueblo" & statename == "CO") |
         (place == "Wausau" & statename == "WI") |
         (place == "Blacksburg" & statename == "VA") |
         (place == "Christiansburg" & statename == "VA") |
         (place == "Odessa" & statename == "TX") |
         (place == "Kahului" & statename == "HI") |
         (place == "Janesville" & statename == "WI") |
         (place == "BeloitCity" & statename == "WI" & type == "City") |
         (place == "Bloomington" & statename == "IN") |
         (place == "Jackson" & statename == "MI") |
         (place == "Sebastian" & statename == "FL") |
         (place == "VeroBeach" & statename == "FL") |
         (place == "StateCollege" & statename == "PA") |
         (place == "IdahoFalls" & statename == "ID") |
         (place == "Decatur" & statename == "AL") |
         (place == "Madera" & statename == "CA") |
         (place == "Chambersburg" & statename == "PA") |
         (place == "Waynesboro" & statename == "PA") |
         (place == "GrandJunction" & statename == "CO") |
         (place == "Elizabethtown" & statename == "KY") |
         (place == "SantaFe" & statename == "NM") |
         (place == "Monroe" & statename == "MI") |
         (place == "Niles" & statename == "MI") |
         (place == "Vineland" & statename == "NJ") |
         (place == "HomosassaSprings" & statename == "FL") |
         (place == "Hanford" & statename == "CA") |
         (place == "Bangor" & statename == "ME") |
         (place == "Alexandria" & statename == "LA") |
         (place == "Dothan" & statename == "AL") |
         (place == "Florence" & statename == "AL") |
         (place == "MuscleShoals" & statename == "AL") |
         (place == "Jefferson" & statename == "MO") |
         (place == "SiouxCity" & statename == "IA") |
         (place == "Albany" & statename == "GA") |
         (place == "WichitaFalls" & statename == "TX") |
         (place == "Valdosta" & statename == "GA") |
         (place == "Texarkana" & statename == "TX") |
         (place == "Logan" & statename == "UT") |
         (place == "Flagstaff" & statename == "AZ") |
         (place == "RockyMount" & statename == "NC") |
         (place == "Lebanon" & statename == "PA") |
         (place == "Dalton" & statename == "GA") |
         (place == "Morristown" & statename == "TN") |
         (place == "Winchester" & statename == "VA") |
         (place == "Morgantown" & statename == "WV") |
         (place == "LaCrosse" & statename == "WI") |
         (place == "Wheeling" & statename == "WV") |
         (place == "Rapid" & statename == "SD") |
         (place == "Napa" & statename == "CA") |
         (place == "Sumter" & statename == "SC") |
         (place == "Springfield" & statename == "OH") |
         (place == "Harrisonburg" & statename == "VA") |
         (place == "Sherman" & statename == "TX") |
         (place == "BattleCreek" & statename == "MI") |
         (place == "Jonesboro" & statename == "AR") |
         (place == "Manhattan" & statename == "KS") |
         (place == "Bismarck" & statename == "ND") |
         (place == "Johnstown" & statename == "PA") |
         (place == "Carbondale" & statename == "IL") |
         (place == "Marion" & statename == "IL") |
         (place == "Hammond" & statename == "LA") |
         (place == "TheVillages" & statename == "FL") |
         (place == "MountVernon" & statename == "WA") |
         (place == "Pittsfield" & statename == "MA") |
         (place == "Albany" & statename == "OR") |
         (place == "GlensFalls" & statename == "NY") |
         (place == "Lawton" & statename == "OK") |
         (place == "Cleveland" & statename == "TN") |
         (place == "SierraVista" & statename == "AZ") |
         (place == "Douglas" & statename == "AZ") |
         (place == "Staunton" & statename == "VA") |
         (place == "Ames" & statename == "IA") |
         (place == "Mansfield" & statename == "OH") |
         (place == "SanAngelo" & statename == "TX") |
         (place == "Altoona" & statename == "PA") |
         (place == "NewBern" & statename == "NC") |
         (place == "Wenatchee" & statename == "WA") |
         (place == "Farmington" & statename == "NM") |
         (place == "Owensboro" & statename == "KY") |
         (place == "StJoseph" & statename == "MO") |
         (place == "Lawrence" & statename == "KS") |
         (place == "Sheboygan" & statename == "WI") |
         (place == "Missoula" & statename == "MT") |
         (place == "Goldsboro" & statename == "NC") |
         (place == "Weirton" & statename == "WV") |
         (place == "Steubenville" & statename == "OH") |
         (place == "Watertown" & statename == "NY") |
         (place == "Anniston" & statename == "AL") |
         (place == "Oxford" & statename == "AL") |
         (place == "Beckley" & statename == "WV") |
         (place == "TwinFalls" & statename == "ID") |
         (place == "Williamsport" & statename == "PA") |
         (place == "California" & statename == "MD") |
         (place == "LexingtonPark" & statename == "MD") |
         (place == "Brunswick" & statename == "GA") |
         (place == "MichiganCity" & statename == "IN") |
         (place == "LaPorte" & statename == "IN") |
         (place == "Muncie" & statename == "IN") |
         (place == "Lewiston" & statename == "ME") |
         (place == "Auburn" & statename == "ME") |
         (place == "Longview" & statename == "WA") |
         (place == "Kankakee" & statename == "IL") |
         (place == "Ithaca" & statename == "NY") |
         (place == "GrandForks" & statename == "ND") |
         (place == "FondDuLac" & statename == "WI") |
         (place == "Decatur" & statename == "IL") |
         (place =="BayCity" & statename == "MI") |
         (place == "Gettysburg" & statename == "PA") |
         (place == "Mankato" & statename == "MN") |
         (place == "Gadsden" & statename == "AL") |
         (place == "Lima" & statename == "OH") |
         (place == "AvonPark" & statename == "FL") |
         (place == "Sebring" & statename == "FL") |
         (place == "Cheyenne" & statename == "WY") |
         (place == "HotSprings" & statename == "AR") |
         (place == "Dubuque" & statename == "IA") |
         (place == "Rome" & statename == "GA") |
         (place == "Victoria" & statename == "TX") |
         (place == "CapeGirardeau" & statename == "MO") |
         (place == "Fairbanks" & statename == "AK") |
         (place == "OceanCity" & statename == "NJ") |
         (place == "Corvallis" & statename == "OR") |
         (place == "Cumberland" & statename == "MD") |
         (place == "Pocatello" & statename == "ID") |
         (place == "Parkersburg" & statename == "WV") |
         (place == "Vienna" & statename == "WV") | 
         (place == "GrantsPass" & statename == "OR") |
         (place == "PineBluff" & statename == "AR") |
         (place == "GreatFalls" & statename == "MT") |
         (place == "Elmira" & statename == "NY") |
         (place == "Kokomo" & statename == "IN") |
         (place == "Midland" & statename == "MI") |
         (place == "Bloomsburg" & statename == "PA") |
         (place == "Berwick" & statename == "PA") |
         (place == "Columbus" & statename == "IN") |
         (place == "Hinesville" & statename == "GA") |
         (place == "Casper" & statename == "WY") |
         (place == "GrandIsland" & statename == "NE") |
         (place == "Danville" & statename == "IL") |
         (place == "Lewiston" & statename == "ID") |
         (place == "Enid" & statename == "OK") |
         (place == "WallaWalla" & statename == "WA") |
         (place == "Carson" & statename == "NV"))

cc.in <- rbind(cc.in1,
               cc.in2) 

cc.out <- cc.in %>%
  select(GEOID)

save(cc.out,
     file = paste0(data_path,
                   "005_cc_out.Rda"))

ccm1 <- stata.merge(cc.in,
                    muni.msa.2009,
                    "GEOID")

table(ccm1$merge.variable, useNA ="ifany")

ccm2 <- stata.merge(cc.in,
                    muni.msa.2022,
                    "GEOID")

table(ccm2$merge.variable, useNA ="ifany")

ccm1.keep <- ccm1 %>%
  filter(merge.variable == 3) %>%
  select(cbsa10)

ccm2.look <- ccm2 %>%
  filter(merge.variable == 3) 

ccm2.keep <- ccm2 %>%
  filter(merge.variable == 3) %>%
  select(cbsa10)

## which central cities are missing? ##

ccm2.miss <- ccm2 %>%
  filter(merge.variable == 2) %>%
  select(GEOID,
         NAME) %>%
  mutate(place =  gsub("^(.*?),.*", "\\1", NAME),
         state_fips = substr(GEOID,1,2)) %>%
  inner_join(ccs.final, c("place","state_fips")) 

ccm.keep <- rbind(ccm1.keep,
                  ccm2.keep)

msas.keep.r2 <- unique(ccm.keep$cbsa10)

## apply restrictions ##

msas.keep <- data.frame(cbsa10 = intersect(msas.keep.r1a,
                                           msas.keep.r2)) %>%
  rename(CBSA = cbsa10)

save(msas.keep,
    file = "005_msas_keep.Rda")

msas.keep.alt <- data.frame(cbsa10 = intersect(msas.keep.r1b,
                                               msas.keep.r2)) %>%
  rename(CBSA = cbsa10)

save(msas.keep.alt,
     file = "005_msas_keep_alt.Rda")

## number of munis per MSA pre-treatment covariate ## 

munis.per <- muni.msa.2009.all %>%
  select(cbsa10,
         name,
         n_all,
         totpop) %>%
  rename(munis_tot = n_all) %>%
  rename(CBSA = cbsa10) 

save(munis.per,
     file = "005_munis_cov.Rda")


## END OF PROGRAM ##

#sink()